Multi-breed GRM
breed_key <-
list.files(here::here("data/derived_data/sample_selection/ds_plink/"), pattern = ".fam", full.names = TRUE) %>%
set_names(nm = (basename(.) %>%
tools::file_path_sans_ext())) %>%
map_df(read_table2,
col_names = FALSE, .id = "pop") %>%
rename(id = X1) %>%
select(pop, id) %>%
mutate(pop = str_remove(pop, "\\.ds_plink"))
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: changed 1039 values (100%) of 'pop' (0 new NA)
big_grm_long <-
read_table2(here::here(
str_c(
"data/derived_data/sample_selection/ds_grm_big/ds_grm_big.sXX.txt"
)
),
col_names = FALSE) %>%
#removes random last column that's all 1s
select_if(is.numeric) %>%
#Read in the fam file
bind_cols(read_table2(here::here(
str_c(
"data/derived_data/sample_selection/ds_plink_big/ds_plink_big.fam"
)
),
col_names = FALSE) %>%
#select only first column of fam file (international ids
select(X1) %>%
rename(ind1 = X1)) %>%
#make row names international id
tibble::column_to_rownames("ind1") %>%
#make column names international id
rlang::set_names(rownames(.)) %>%
tibble::rownames_to_column(var = "ind1") %>%
reshape2::melt(id = c("ind1")) %>%
rename(ind2 = variable) %>%
left_join(breed_key, by = c("ind1" = "id")) %>%
filter(pop != "limousin") %>%
rename(ind1_pop = pop) %>%
left_join(breed_key, by = c("ind2" = "id")) %>%
rename(ind2_pop = pop)
## select_if: dropped one variable (X1040)
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## left_join: added 0 rows and added one column (pop)
## filter: removed 52989 rows (5%)
## left_join: added 0 rows and added one column (pop)
Plot multi-breed GRM
big_grm_long %>%
ggplot(aes(x = ind1,
y = ind2,
fill = value)) +
geom_tile() +
viridis::scale_fill_viridis(
option = "inferno",
direction = -1) +
theme(
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()
) +
labs(
x = "Individual 1",
y = "Individual 2",
title = str_c("Pairwise genomic relatedness pre-pruning: multi-breed (all individuals)")
)

ggsave(here::here(str_c("figures/sample_selection/big_grm.png")))
## Saving 7 x 5 in image
Try re-plotting excluding Brahman
big_grm_long %>%
filter(ind1_pop != "brahman") %>%
filter(ind2_pop != "brahman") %>%
ggplot(aes(x = ind1,
y = ind2,
fill = value)) +
geom_tile() +
viridis::scale_fill_viridis(
option = "inferno",
direction = -1) +
theme(
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()
) +
labs(
x = "Individual 1",
y = "Individual 2",
title = str_wrap("Pairwise genomic relatedness pre-pruning: multi-breed (all individuals excluding Brahman)", width = 45)
)
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)

ggsave(here::here(str_c("figures/sample_selection/big_grm_no_brm.png")))
## Saving 7 x 5 in image
Plot multi-breed GRM split by breed
#Probably a way to do this with map and purrr but I'm lazy and can't get map to take a df as an argument
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais"), plot_big_grm)
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 666699 rows (73%)
## filter: removed 180804 rows (75%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 641245 rows (71%)
## filter: removed 192738 rows (72%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 779284 rows (86%)
## filter: removed 111804 rows (87%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 812570 rows (89%)
## filter: removed 86338 rows (90%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 819423 rows (90%)
## filter: removed 80808 rows (91%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 823339 rows (91%)
## filter: removed 77604 rows (91%)
## Saving 7 x 5 in image
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
Plot distribution of GRM values
big_grm_long %>%
filter(ind1_pop == ind2_pop) %>%
filter(ind1 != ind2) %>%
mutate(ind1_pop = str_to_title(ind1_pop)) %>%
ggplot(aes(x = value,
y = forcats::fct_reorder(ind1_pop, value, mean),
#fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
fill = ind1_pop)) +
ggridges::geom_density_ridges(alpha = 0.5) +
guides(fill = FALSE) +
geom_vline(aes(xintercept = mean(value))) +
labs(x = "GRM value", y = "Kernel density", title = str_wrap("Density of pairwise genomic relatedness pre-pruning by breed (Brahman included)", width = 50))
## filter: removed 844516 rows (82%)
## filter: removed 988 rows (1%)
## mutate: changed 181028 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.0218

cowplot::ggsave(here::here("figures/sample_selection/big_grm_density_w_brm.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.0218
big_grm_long %>%
filter(ind1_pop == ind2_pop) %>%
filter(ind1 != ind2) %>%
filter(ind1_pop != "brahman") %>%
filter(ind2_pop != "brahman") %>%
mutate(ind1_pop = str_to_title(ind1_pop)) %>%
ggplot(aes(x = value,
y = forcats::fct_reorder(ind1_pop, value, mean),
#fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
fill = ind1_pop)) +
ggridges::geom_density_ridges(alpha = 0.5) +
guides(fill = FALSE) +
geom_vline(aes(xintercept = mean(value))) +
labs(x = "GRM value", y = "Kernel density", fill = "Breed", title = str_wrap("Density of pairwise genomic relatedness pre-pruning by breed (Brahman excluded)", width = 50))
## filter: removed 844516 rows (82%)
## filter: removed 988 rows (1%)
## filter: removed 3540 rows (2%)
## filter: no rows removed
## mutate: changed 177488 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.0063

cowplot::ggsave(here::here("figures/sample_selection/big_grm_density_no_brm.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.0063
Probing multi-breed GRM
Top 200 highest values
big_grm_long %>%
filter(ind1_pop != "brahman") %>%
filter(ind2_pop != "brahman") %>%
top_n(200, value) %>%
arrange(desc(value))
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## ind1 ind2 value ind1_pop ind2_pop
## 1 CHAUSAM000VPM122718 CHAUSAM000VPM122718 2.5706042 charolais charolais
## 2 UMCUSAU000000194370 UMCUSAU000000194370 2.0339775 charolais charolais
## 3 CHAUSAM00000M225504 CHAUSAM00000M225504 1.3357560 charolais charolais
## 4 CHAUSAM00000M319454 CHAUSAM00000M319454 1.2878654 charolais charolais
## 5 CHAUSAM000VPM122718 CHAUSAM00000M225504 1.2679501 charolais charolais
## 6 CHAUSAM00000M225504 CHAUSAM000VPM122718 1.2679501 charolais charolais
## 7 CHAUSAM000VPM122718 CHAUSAM00000M319454 1.1819029 charolais charolais
## 8 CHAUSAM00000M319454 CHAUSAM000VPM122718 1.1819029 charolais charolais
## 9 CHAUSAM00000M234430 CHAUSAM00000M234430 1.0737484 charolais charolais
## 10 UMCUSAU000000194368 UMCUSAU000000194368 1.0615928 charolais charolais
## 11 UMCUSAU000000194369 UMCUSAU000000194369 0.9810364 charolais charolais
## 12 CHACANM0000RMC30160 CHACANM0000RMC30160 0.9398123 charolais charolais
## 13 CHAUSAM00000M319454 CHAUSAM00000M225504 0.9378336 charolais charolais
## 14 CHAUSAM00000M225504 CHAUSAM00000M319454 0.9378336 charolais charolais
## 15 UMCUSAU000000194657 UMCUSAU000000194657 0.9271013 jersey jersey
## 16 CHACANM0000MC106929 CHACANM0000MC106929 0.9252602 charolais charolais
## 17 CHACANM0000MC236083 CHACANM0000MC236083 0.8828932 charolais charolais
## 18 UMCUSAU000000194349 UMCUSAU000000194349 0.8775803 angus angus
## 19 UMCUSAM000000196786 UMCUSAM000000196786 0.8730899 holstein holstein
## 20 UMCUSAM000000196776 UMCUSAM000000196776 0.8587716 holstein holstein
## 21 RANUSAM000003358146 RANUSAM000003358146 0.8371725 simmental simmental
## 22 UMCUSAU000000198102 UMCUSAU000000198102 0.8320777 hereford hereford
## 23 HOL840M003131131453 HOL840M003131131453 0.8317153 holstein holstein
## 24 CHAUSAM0000MC190391 CHAUSAM0000MC190391 0.8270637 charolais charolais
## 25 UMCUSAF000000199724 UMCUSAF000000199724 0.8238097 hereford hereford
## 26 CHACANM0000MC293022 CHACANM0000MC293022 0.8134152 charolais charolais
## 27 SIMCANM000000361254 SIMCANM000000361254 0.8078976 simmental simmental
## 28 UMCUSAM000000198548 UMCUSAM000000198548 0.7985101 simmental simmental
## 29 UMCUSAU000000194332 UMCUSAU000000194332 0.7934186 angus angus
## 30 SIMUSAM000002068996 SIMUSAM000002068996 0.7900628 simmental simmental
## 31 CHAFRAM005882101816 CHAFRAM005882101816 0.7876660 charolais charolais
## 32 HERUSAM000014661058 HERUSAM000014661058 0.7842708 hereford hereford
## 33 HOLUSAM003126477819 HOLUSAM003126477819 0.7809691 holstein holstein
## 34 UMCUSAM000000196847 UMCUSAM000000196847 0.7792097 holstein holstein
## 35 CHAUSAM000VPM122718 CHAUSAM00000M234430 0.7694200 charolais charolais
## 36 CHAUSAM00000M234430 CHAUSAM000VPM122718 0.7694200 charolais charolais
## 37 SIMUSAM000001716917 SIMUSAM000001716917 0.7689839 simmental simmental
## 38 CHACANM0000MC299727 CHACANM0000MC299727 0.7665262 charolais charolais
## 39 UMCUSAU000000194752 UMCUSAU000000194752 0.7616109 simmental simmental
## 40 CHACANM0000MC226738 CHACANM0000MC226738 0.7569395 charolais charolais
## 41 SIMUSAM000001907954 SIMUSAM000001907954 0.7530423 simmental simmental
## 42 HOL840M003129016258 HOL840M003129016258 0.7520021 holstein holstein
## 43 UMCUSAU000000194373 UMCUSAU000000194373 0.7504952 charolais charolais
## 44 SIMDEUM000919598352 SIMDEUM000919598352 0.7488988 simmental simmental
## 45 HOLUSAM000070626136 HOLUSAM000070626136 0.7458174 holstein holstein
## 46 HOLCANM000109538236 HOLCANM000109538236 0.7401696 holstein holstein
## 47 UMCUSAU000000194847 UMCUSAU000000194847 0.7389349 simmental simmental
## 48 UMCUSAU000000194805 UMCUSAU000000194805 0.7373065 simmental simmental
## 49 UMCUSAU000000194372 UMCUSAU000000194372 0.7356412 charolais charolais
## 50 UMCUSAM000000196842 UMCUSAM000000196842 0.7320852 holstein holstein
## 51 HOLUSAM000002070579 HOLUSAM000002070579 0.7304240 holstein holstein
## 52 UMCUSAU000000194820 UMCUSAU000000194820 0.7170784 charolais charolais
## 53 SIMUSAM000001937373 SIMUSAM000001937373 0.7163587 simmental simmental
## 54 JERUSAM000000652501 JERUSAM000000652501 0.7162488 jersey jersey
## 55 UMCUSAU000000194623 UMCUSAU000000194623 0.7150313 jersey jersey
## 56 HOLUSAM000072826907 HOLUSAM000072826907 0.7137188 holstein holstein
## 57 SIMCANM000000216954 SIMCANM000000216954 0.7134835 simmental simmental
## 58 HOLFRAM004493050102 HOLFRAM004493050102 0.7134118 holstein holstein
## 59 UMCUSAM000000196784 UMCUSAM000000196784 0.7113015 holstein holstein
## 60 SIMDEUM007600026785 SIMDEUM007600026785 0.7097411 simmental simmental
## 61 UMCUSAU000000194623 JERUSAM000000652501 0.7091562 jersey jersey
## 62 JERUSAM000000652501 UMCUSAU000000194623 0.7091562 jersey jersey
## 63 CHAUSAM00000M434790 CHAUSAM00000M434790 0.7081365 charolais charolais
## 64 HOLUSAM003009329221 HOLUSAM003009329221 0.6998858 holstein holstein
## 65 HERUSAM000042800895 HERUSAM000042800895 0.6955825 hereford hereford
## 66 SIMDEUM000933038755 SIMDEUM000933038755 0.6930524 simmental simmental
## 67 HOLUSAM000072495715 HOLUSAM000072495715 0.6924689 holstein holstein
## 68 UMCUSAF000000109173 UMCUSAF000000109173 0.6920664 holstein holstein
## 69 JER199M000071199883 JER199M000071199883 0.6917290 jersey jersey
## 70 UMCUSAU000000194669 UMCUSAU000000194669 0.6914219 jersey jersey
## 71 UMCUSAU000000194643 UMCUSAU000000194643 0.6876487 jersey jersey
## 72 UMCUSAM000000196808 UMCUSAM000000196808 0.6873916 holstein holstein
## 73 UMCUSAU000000194652 UMCUSAU000000194652 0.6854573 jersey jersey
## 74 HERCANM000000C01369 HERCANM000000C01369 0.6854528 hereford hereford
## 75 SIMDEUM000915040032 SIMDEUM000915040032 0.6853930 simmental simmental
## 76 UMCUSAU000000194718 UMCUSAU000000194718 0.6814544 angus angus
## 77 UMCUSAM000000196803 UMCUSAM000000196803 0.6809952 holstein holstein
## 78 UMCUSAU000000194830 UMCUSAU000000194830 0.6803465 hereford hereford
## 79 CHACANM0000MC250513 CHACANM0000MC250513 0.6795258 charolais charolais
## 80 HERCANM000C02789138 HERCANM000C02789138 0.6786716 hereford hereford
## 81 SIMUSAM000000320938 SIMUSAM000000320938 0.6770065 simmental simmental
## 82 HOLUSAM000074284017 HOLUSAM000074284017 0.6768242 holstein holstein
## 83 UMCUSAU000000194250 UMCUSAU000000194250 0.6767167 angus angus
## 84 HOLUSAM003128824393 HOLUSAM003128824393 0.6757989 holstein holstein
## 85 UMCUSAU000000194335 UMCUSAU000000194335 0.6754384 jersey jersey
## 86 SIMUSAM000001818026 SIMUSAM000001818026 0.6748551 simmental simmental
## 87 SIMUSAM000002240471 SIMUSAM000002240471 0.6717169 simmental simmental
## 88 UMCUSAU000000198090 UMCUSAU000000198090 0.6715470 simmental simmental
## 89 UMCUSAU000000194185 UMCUSAU000000194185 0.6714456 holstein holstein
## 90 UMCUSAM000000196827 UMCUSAM000000196827 0.6709645 holstein holstein
## 91 CHAUSAM00000M411450 CHAUSAM00000M411450 0.6709625 charolais charolais
## 92 UMCUSAU000000194670 UMCUSAU000000194670 0.6708354 jersey jersey
## 93 UMCUSAM000000087954 UMCUSAM000000087954 0.6707806 hereford hereford
## 94 UMCUSAU000000194760 UMCUSAU000000194760 0.6705456 hereford hereford
## 95 UMCUSAU000000194840 UMCUSAU000000194840 0.6694463 simmental simmental
## 96 UMCUSAU000000194665 UMCUSAU000000194665 0.6687646 jersey jersey
## 97 CHAUSAM00000M318119 CHAUSAM00000M318119 0.6674392 charolais charolais
## 98 UMCUSAU000000194718 UMCUSAU000000194250 0.6666666 angus angus
## 99 UMCUSAU000000194250 UMCUSAU000000194718 0.6666666 angus angus
## 100 UMCUSAM000000092750 UMCUSAM000000092750 0.6659566 charolais charolais
## 101 SIMUSAM000000006084 SIMUSAM000000006084 0.6646682 simmental simmental
## 102 SIMDEUM000929189864 SIMDEUM000929189864 0.6643219 simmental simmental
## 103 UMCUSAU000000194663 UMCUSAU000000194663 0.6638102 jersey jersey
## 104 UMCUSAU000000194666 UMCUSAU000000194666 0.6634736 jersey jersey
## 105 HOLUSAM000074564764 HOLUSAM000074564764 0.6634068 holstein holstein
## 106 UMCUSAM000000196831 UMCUSAM000000196831 0.6623711 holstein holstein
## 107 SIMDEUM000912851741 SIMDEUM000912851741 0.6612683 simmental simmental
## 108 UMCUSAU000000194829 UMCUSAU000000194829 0.6599993 simmental simmental
## 109 UMCUSAM000000198521 UMCUSAM000000198521 0.6592910 angus angus
## 110 UMCUSAU000000198082 UMCUSAU000000198082 0.6590175 simmental simmental
## 111 SIMDEUM000918912889 SIMDEUM000918912889 0.6586103 simmental simmental
## 112 SIMDEUM000932739095 SIMDEUM000932739095 0.6581896 simmental simmental
## 113 UMCUSAM000000198543 UMCUSAM000000198543 0.6572667 simmental simmental
## 114 HERCANM000C02698670 HERCANM000C02698670 0.6563364 hereford hereford
## 115 UMCUSAM000000198530 UMCUSAM000000198530 0.6560225 hereford hereford
## 116 UMCUSAF000000199727 UMCUSAF000000199727 0.6559463 hereford hereford
## 117 UMCUSAU000000194266 UMCUSAU000000194266 0.6556448 angus angus
## 118 HOL840M003130854065 HOL840M003130854065 0.6548750 holstein holstein
## 119 UMCUSAM000000196812 UMCUSAM000000196812 0.6539292 holstein holstein
## 120 HERUSAM000015587538 HERUSAM000015587538 0.6524675 hereford hereford
## 121 JERAUSM000A00000810 JERAUSM000A00000810 0.6521554 jersey jersey
## 122 UMCUSAU000000194266 UMCUSAM000000198521 0.6518054 angus angus
## 123 UMCUSAM000000198521 UMCUSAU000000194266 0.6518054 angus angus
## 124 UMCUSAU000000194613 UMCUSAU000000194613 0.6512466 jersey jersey
## 125 HOLUSAM000074228150 HOLUSAM000074228150 0.6505504 holstein holstein
## 126 UMCUSAU000000194616 UMCUSAU000000194616 0.6504354 jersey jersey
## 127 HOLFRAM005694028588 HOLFRAM005694028588 0.6497807 holstein holstein
## 128 UMCUSAU000000194637 UMCUSAU000000194637 0.6491219 jersey jersey
## 129 SIMCANM000000248382 SIMCANM000000248382 0.6490407 simmental simmental
## 130 JERAUSM000A00008529 JERAUSM000A00008529 0.6489935 jersey jersey
## 131 UMCUSAU000000194750 UMCUSAU000000194750 0.6483195 charolais charolais
## 132 AANUSAM000016447771 AANUSAM000016447771 0.6482150 angus angus
## 133 UMCUSAU000000194831 UMCUSAU000000194831 0.6480902 charolais charolais
## 134 UMCUSAU000000194613 JERAUSM000A00000810 0.6477522 jersey jersey
## 135 JERAUSM000A00000810 UMCUSAU000000194613 0.6477522 jersey jersey
## 136 UMCUSAU000000198089 UMCUSAU000000198089 0.6476445 simmental simmental
## 137 UMCUSAU000000194783 UMCUSAU000000194783 0.6469001 hereford hereford
## 138 UMCUSAU000000194658 UMCUSAU000000194658 0.6459441 jersey jersey
## 139 UMCUSAU000000194185 HOLFRAM004493050102 0.6456350 holstein holstein
## 140 HOLFRAM004493050102 UMCUSAU000000194185 0.6456350 holstein holstein
## 141 SIMDEUM000979317838 SIMDEUM000979317838 0.6455098 simmental simmental
## 142 UMCUSAU000000194617 UMCUSAU000000194617 0.6452341 jersey jersey
## 143 UMCUSAU000000194573 UMCUSAU000000194573 0.6445948 holstein holstein
## 144 UMCUSAU000000194616 JERAUSM000A00008529 0.6443452 jersey jersey
## 145 JERAUSM000A00008529 UMCUSAU000000194616 0.6443452 jersey jersey
## 146 HOL840M003124584834 HOL840M003124584834 0.6439167 holstein holstein
## 147 JERAUSM000A00011730 JERAUSM000A00011730 0.6438515 jersey jersey
## 148 HOLUSAM003125201927 HOLUSAM003125201927 0.6436275 holstein holstein
## 149 UMCUSAM000000196763 UMCUSAM000000196763 0.6433089 holstein holstein
## 150 CHACANM00000FMC5641 CHACANM00000FMC5641 0.6430507 charolais charolais
## 151 UMCUSAU000000194341 UMCUSAU000000194341 0.6427181 jersey jersey
## 152 SIMUSAM000000000001 SIMUSAM000000000001 0.6395406 simmental simmental
## 153 AANUSAM000018365756 AANUSAM000018365756 0.6385023 angus angus
## 154 UMCUSAU000000194617 JERAUSM000A00011730 0.6384078 jersey jersey
## 155 JERAUSM000A00011730 UMCUSAU000000194617 0.6384078 jersey jersey
## 156 UMCUSAU000000194625 UMCUSAU000000194625 0.6381224 jersey jersey
## 157 UMCUSAU000000194827 UMCUSAU000000194827 0.6372269 charolais charolais
## 158 UMCUSAU000000194273 UMCUSAU000000194273 0.6361041 hereford hereford
## 159 UMCUSAU000000194842 UMCUSAU000000194842 0.6360503 simmental simmental
## 160 HERCANM000C02728663 HERCANM000C02728663 0.6356617 hereford hereford
## 161 UMCUSAU000000194786 UMCUSAU000000194786 0.6345374 charolais charolais
## 162 SIMDEUM000938263111 SIMDEUM000938263111 0.6344897 simmental simmental
## 163 UMCUSAM000000196826 UMCUSAM000000196826 0.6339579 holstein holstein
## 164 UMCUSAU000000194621 UMCUSAU000000194621 0.6329732 jersey jersey
## 165 CHAUSAM00000M297007 CHAUSAM00000M297007 0.6327429 charolais charolais
## 166 SIMUSAM000002144976 SIMUSAM000002144976 0.6324821 simmental simmental
## 167 HOLUSAM000071451889 HOLUSAM000071451889 0.6323924 holstein holstein
## 168 JERUSAM000000650436 JERUSAM000000650436 0.6323838 jersey jersey
## 169 SIMUSAM000000000010 SIMUSAM000000000010 0.6319438 simmental simmental
## 170 CHAFRAM005872122876 CHAFRAM005872122876 0.6311314 charolais charolais
## 171 HERCANM000C02738219 HERCANM000C02738219 0.6308122 hereford hereford
## 172 SIMUSAM000001282876 SIMUSAM000001282876 0.6307437 simmental simmental
## 173 UMCUSAU000000194738 UMCUSAU000000194738 0.6305916 hereford hereford
## 174 AANUSAM000009506886 AANUSAM000009506886 0.6304343 angus angus
## 175 SIMDEUM000912851233 SIMDEUM000912851233 0.6302727 simmental simmental
## 176 UMCUSAU000000194660 UMCUSAU000000194660 0.6299851 jersey jersey
## 177 HOLUSAM003125519831 HOLUSAM003125519831 0.6299163 holstein holstein
## 178 SIMCANM000000293252 SIMCANM000000293252 0.6298893 simmental simmental
## 179 SIMDEUM000913325437 SIMDEUM000913325437 0.6297151 simmental simmental
## 180 HOL840M003128792954 HOL840M003128792954 0.6296093 holstein holstein
## 181 UMCUSAU000000198080 UMCUSAU000000198080 0.6293500 simmental simmental
## 182 AANUSAM000015330743 AANUSAM000015330743 0.6289334 angus angus
## 183 UMCUSAU000000194621 JERUSAM000000650436 0.6281390 jersey jersey
## 184 JERUSAM000000650436 UMCUSAU000000194621 0.6281390 jersey jersey
## 185 UMCUSAU000000194768 UMCUSAU000000194768 0.6277001 charolais charolais
## 186 AANNZLM001217000784 AANNZLM001217000784 0.6262946 angus angus
## 187 UMCUSAU000000194792 UMCUSAU000000194792 0.6260797 simmental simmental
## 188 UMCUSAU000000194627 UMCUSAU000000194627 0.6260069 jersey jersey
## 189 UMCUSAU000000198081 UMCUSAU000000198081 0.6259564 simmental simmental
## 190 UMCUSAU000000194635 UMCUSAU000000194635 0.6245757 jersey jersey
## 191 HOLUSAM003128590796 HOLUSAM003128590796 0.6244550 holstein holstein
## 192 HOLUSAM003129037765 HOLUSAM003129037765 0.6240283 holstein holstein
## 193 SIMUSAM000002002092 SIMUSAM000002002092 0.6229744 simmental simmental
## 194 UMCUSAU000000194848 UMCUSAU000000194848 0.6227866 holstein holstein
## 195 UMCUSAU000000194668 UMCUSAU000000194668 0.6219642 jersey jersey
## 196 UMCUSAU000000194800 UMCUSAU000000194800 0.6217710 simmental simmental
## 197 UMCUSAU000000194630 UMCUSAU000000194630 0.6213933 jersey jersey
## 198 UMCUSAM000000198537 UMCUSAM000000198537 0.6213582 angus angus
## 199 SIMDEUM000929276244 SIMDEUM000929276244 0.6213490 simmental simmental
## 200 CHAUSAM00000M246564 CHAUSAM00000M246564 0.6205132 charolais charolais
#excluding diagonal
big_grm_long %>%
filter(ind1_pop != "brahman") %>%
filter(ind2_pop != "brahman") %>%
filter(ind1 != ind2) %>%
top_n(200, value) %>%
arrange(desc(value))
## filter: removed 62340 rows (6%)
## filter: removed 55680 rows (6%)
## filter: removed 928 rows (<1%)
## ind1 ind2 value ind1_pop ind2_pop
## 1 CHAUSAM000VPM122718 CHAUSAM00000M225504 1.2679501 charolais charolais
## 2 CHAUSAM00000M225504 CHAUSAM000VPM122718 1.2679501 charolais charolais
## 3 CHAUSAM000VPM122718 CHAUSAM00000M319454 1.1819029 charolais charolais
## 4 CHAUSAM00000M319454 CHAUSAM000VPM122718 1.1819029 charolais charolais
## 5 CHAUSAM00000M319454 CHAUSAM00000M225504 0.9378336 charolais charolais
## 6 CHAUSAM00000M225504 CHAUSAM00000M319454 0.9378336 charolais charolais
## 7 CHAUSAM000VPM122718 CHAUSAM00000M234430 0.7694200 charolais charolais
## 8 CHAUSAM00000M234430 CHAUSAM000VPM122718 0.7694200 charolais charolais
## 9 UMCUSAU000000194623 JERUSAM000000652501 0.7091562 jersey jersey
## 10 JERUSAM000000652501 UMCUSAU000000194623 0.7091562 jersey jersey
## 11 UMCUSAU000000194718 UMCUSAU000000194250 0.6666666 angus angus
## 12 UMCUSAU000000194250 UMCUSAU000000194718 0.6666666 angus angus
## 13 UMCUSAU000000194266 UMCUSAM000000198521 0.6518054 angus angus
## 14 UMCUSAM000000198521 UMCUSAU000000194266 0.6518054 angus angus
## 15 UMCUSAU000000194613 JERAUSM000A00000810 0.6477522 jersey jersey
## 16 JERAUSM000A00000810 UMCUSAU000000194613 0.6477522 jersey jersey
## 17 UMCUSAU000000194185 HOLFRAM004493050102 0.6456350 holstein holstein
## 18 HOLFRAM004493050102 UMCUSAU000000194185 0.6456350 holstein holstein
## 19 UMCUSAU000000194616 JERAUSM000A00008529 0.6443452 jersey jersey
## 20 JERAUSM000A00008529 UMCUSAU000000194616 0.6443452 jersey jersey
## 21 UMCUSAU000000194617 JERAUSM000A00011730 0.6384078 jersey jersey
## 22 JERAUSM000A00011730 UMCUSAU000000194617 0.6384078 jersey jersey
## 23 UMCUSAU000000194621 JERUSAM000000650436 0.6281390 jersey jersey
## 24 JERUSAM000000650436 UMCUSAU000000194621 0.6281390 jersey jersey
## 25 SIMUSAM000001907954 SIMUSAM000001716917 0.6161832 simmental simmental
## 26 SIMUSAM000001716917 SIMUSAM000001907954 0.6161832 simmental simmental
## 27 HOLCANM000109538236 HOL840M003131131453 0.6147859 holstein holstein
## 28 HOL840M003131131453 HOLCANM000109538236 0.6147859 holstein holstein
## 29 UMCUSAU000000194614 JERAUSM000A00001716 0.6137056 jersey jersey
## 30 JERAUSM000A00001716 UMCUSAU000000194614 0.6137056 jersey jersey
## 31 HOLUSAM003128824393 HOLCANM000109538236 0.6122493 holstein holstein
## 32 HOLCANM000109538236 HOLUSAM003128824393 0.6122493 holstein holstein
## 33 UMCUSAU000000194611 JERAUSM000A00000734 0.6101062 jersey jersey
## 34 JERAUSM000A00000734 UMCUSAU000000194611 0.6101062 jersey jersey
## 35 UMCUSAM000000196852 UMCUSAM000000196809 0.5953712 holstein holstein
## 36 UMCUSAM000000196809 UMCUSAM000000196852 0.5953712 holstein holstein
## 37 UMCUSAU000000194720 UMCUSAU000000194252 0.5923813 angus angus
## 38 UMCUSAU000000194252 UMCUSAU000000194720 0.5923813 angus angus
## 39 UMCUSAM000000196851 UMCUSAM000000196773 0.5895674 holstein holstein
## 40 UMCUSAM000000196773 UMCUSAM000000196851 0.5895674 holstein holstein
## 41 UMCUSAU000000194612 JERAUSM000A00000747 0.5881447 jersey jersey
## 42 JERAUSM000A00000747 UMCUSAU000000194612 0.5881447 jersey jersey
## 43 UMCUSAM000000198519 JERAUSM000A00010153 0.5872306 jersey jersey
## 44 JERAUSM000A00010153 UMCUSAM000000198519 0.5872306 jersey jersey
## 45 UMCUSAU000000194370 UMCUSAU000000194368 0.5843120 charolais charolais
## 46 UMCUSAU000000194368 UMCUSAU000000194370 0.5843120 charolais charolais
## 47 UMCUSAU000000194619 JERUSAM000000646877 0.5825573 jersey jersey
## 48 JERUSAM000000646877 UMCUSAU000000194619 0.5825573 jersey jersey
## 49 HOLUSAM000071451889 HOLUSAM000070626136 0.5810284 holstein holstein
## 50 HOLUSAM000070626136 HOLUSAM000071451889 0.5810284 holstein holstein
## 51 UMCUSAU000000194727 UMCUSAU000000194259 0.5807513 angus angus
## 52 UMCUSAU000000194259 UMCUSAU000000194727 0.5807513 angus angus
## 53 UMCUSAU000000194657 UMCUSAU000000194623 0.5780375 jersey jersey
## 54 UMCUSAU000000194623 UMCUSAU000000194657 0.5780375 jersey jersey
## 55 UMCUSAU000000194657 JERUSAM000000652501 0.5755013 jersey jersey
## 56 JERUSAM000000652501 UMCUSAU000000194657 0.5755013 jersey jersey
## 57 HOLUSAM003131131371 HOL840M003131131371 0.5752966 holstein holstein
## 58 HOL840M003131131371 HOLUSAM003131131371 0.5752966 holstein holstein
## 59 UMCUSAU000000194615 JERAUSM000A00003096 0.5705881 jersey jersey
## 60 JERAUSM000A00003096 UMCUSAU000000194615 0.5705881 jersey jersey
## 61 UMCUSAU000000194715 UMCUSAU000000194247 0.5698926 angus angus
## 62 UMCUSAU000000194247 UMCUSAU000000194715 0.5698926 angus angus
## 63 UMCUSAU000000194731 UMCUSAU000000194264 0.5686411 angus angus
## 64 UMCUSAU000000194264 UMCUSAU000000194731 0.5686411 angus angus
## 65 HOLUSAM003125201927 HOLCANM000109538236 0.5670664 holstein holstein
## 66 HOLCANM000109538236 HOLUSAM003125201927 0.5670664 holstein holstein
## 67 UMCUSAU000000194620 JERUSAM000000649797 0.5647312 jersey jersey
## 68 JERUSAM000000649797 UMCUSAU000000194620 0.5647312 jersey jersey
## 69 UMCUSAU000000194624 JERUSAM000000665185 0.5646840 jersey jersey
## 70 JERUSAM000000665185 UMCUSAU000000194624 0.5646840 jersey jersey
## 71 UMCUSAM000000196791 UMCUSAM000000196782 0.5638757 holstein holstein
## 72 UMCUSAM000000196782 UMCUSAM000000196791 0.5638757 holstein holstein
## 73 UMCUSAU000000194618 JERUSAM000000644410 0.5604355 jersey jersey
## 74 JERUSAM000000644410 UMCUSAU000000194618 0.5604355 jersey jersey
## 75 HOLUSAM003009329221 HOL840M003131131453 0.5558447 holstein holstein
## 76 HOL840M003131131453 HOLUSAM003009329221 0.5558447 holstein holstein
## 77 AANUSAM000007501542 AANUSAM000005221298 0.5554328 angus angus
## 78 AANUSAM000005221298 AANUSAM000007501542 0.5554328 angus angus
## 79 UMCUSAU000000194560 HOLAUSM000A00009559 0.5543284 holstein holstein
## 80 HOLAUSM000A00009559 UMCUSAU000000194560 0.5543284 holstein holstein
## 81 UMCUSAU000000194622 JERUSAM000000651268 0.5540344 jersey jersey
## 82 JERUSAM000000651268 UMCUSAU000000194622 0.5540344 jersey jersey
## 83 UMCUSAU000000194726 UMCUSAU000000194258 0.5520806 angus angus
## 84 UMCUSAU000000194258 UMCUSAU000000194726 0.5520806 angus angus
## 85 UMCUSAM000000199730 UMCUSAF000000199724 0.5520187 hereford hereford
## 86 UMCUSAF000000199724 UMCUSAM000000199730 0.5520187 hereford hereford
## 87 UMCUSAU000000194702 UMCUSAU000000194234 0.5447748 angus angus
## 88 UMCUSAU000000194234 UMCUSAU000000194702 0.5447748 angus angus
## 89 UMCUSAU000000194723 UMCUSAU000000194255 0.5441425 angus angus
## 90 UMCUSAU000000194255 UMCUSAU000000194723 0.5441425 angus angus
## 91 HOLUSAM003125201927 HOL840M003131131453 0.5431097 holstein holstein
## 92 HOL840M003131131453 HOLUSAM003125201927 0.5431097 holstein holstein
## 93 HOLUSAM003128824393 HOL840M003131131453 0.5417542 holstein holstein
## 94 HOL840M003131131453 HOLUSAM003128824393 0.5417542 holstein holstein
## 95 UMCUSAU000000194728 UMCUSAU000000194261 0.5406755 angus angus
## 96 UMCUSAU000000194261 UMCUSAU000000194728 0.5406755 angus angus
## 97 UMCUSAF000000199727 UMCUSAF000000199724 0.5366674 hereford hereford
## 98 UMCUSAF000000199724 UMCUSAF000000199727 0.5366674 hereford hereford
## 99 UMCUSAU000000194182 HOLFRAM002998012650 0.5365281 holstein holstein
## 100 HOLFRAM002998012650 UMCUSAU000000194182 0.5365281 holstein holstein
## 101 UMCUSAU000000194697 UMCUSAU000000194229 0.5356164 angus angus
## 102 UMCUSAU000000194229 UMCUSAU000000194697 0.5356164 angus angus
## 103 UMCUSAU000000194260 UMCUSAM000000198520 0.5354116 angus angus
## 104 UMCUSAM000000198520 UMCUSAU000000194260 0.5354116 angus angus
## 105 UMCUSAU000000194696 UMCUSAU000000194228 0.5341569 angus angus
## 106 UMCUSAU000000194228 UMCUSAU000000194696 0.5341569 angus angus
## 107 UMCUSAU000000194733 UMCUSAU000000194265 0.5334909 angus angus
## 108 UMCUSAU000000194265 UMCUSAU000000194733 0.5334909 angus angus
## 109 UMCUSAM000000196770 HOLUSAM003126477819 0.5334516 holstein holstein
## 110 HOLUSAM003126477819 UMCUSAM000000196770 0.5334516 holstein holstein
## 111 UMCUSAU000000194716 UMCUSAU000000194248 0.5293155 angus angus
## 112 UMCUSAU000000194248 UMCUSAU000000194716 0.5293155 angus angus
## 113 UMCUSAU000000194782 UMCUSAU000000194796 0.5278592 hereford limousin
## 114 UMCUSAU000000194573 HOLCANM000000352790 0.5233847 holstein holstein
## 115 HOLCANM000000352790 UMCUSAU000000194573 0.5233847 holstein holstein
## 116 UMCUSAM000000196786 HOLUSAM003012560018 0.5207687 holstein holstein
## 117 HOLUSAM003012560018 UMCUSAM000000196786 0.5207687 holstein holstein
## 118 UMCUSAU000000194567 HOLAUSM000H01059976 0.5202426 holstein holstein
## 119 HOLAUSM000H01059976 UMCUSAU000000194567 0.5202426 holstein holstein
## 120 UMCUSAU000000194558 HOLAUSM000A00009209 0.5192653 holstein holstein
## 121 HOLAUSM000A00009209 UMCUSAU000000194558 0.5192653 holstein holstein
## 122 UMCUSAM000000196847 HOLUSAM000002070579 0.5192495 holstein holstein
## 123 HOLUSAM000002070579 UMCUSAM000000196847 0.5192495 holstein holstein
## 124 UMCUSAU000000194713 UMCUSAU000000194245 0.5188542 angus angus
## 125 UMCUSAU000000194245 UMCUSAU000000194713 0.5188542 angus angus
## 126 UMCUSAU000000194671 UMCUSAU000000194657 0.5179075 jersey jersey
## 127 UMCUSAU000000194657 UMCUSAU000000194671 0.5179075 jersey jersey
## 128 HOLUSAM003009329221 HOLCANM000109538236 0.5173219 holstein holstein
## 129 HOLCANM000109538236 HOLUSAM003009329221 0.5173219 holstein holstein
## 130 UMCUSAU000000194721 UMCUSAU000000194253 0.5159907 angus angus
## 131 UMCUSAU000000194253 UMCUSAU000000194721 0.5159907 angus angus
## 132 HOLUSAM003131131371 HOLCANM000109538236 0.5150506 holstein holstein
## 133 HOLCANM000109538236 HOLUSAM003131131371 0.5150506 holstein holstein
## 134 HOLCANM000109538236 HOL840M003131131371 0.5131684 holstein holstein
## 135 HOL840M003131131371 HOLCANM000109538236 0.5131684 holstein holstein
## 136 SIMUSAM000001937373 RANUSAM000003358146 0.5105202 simmental simmental
## 137 RANUSAM000003358146 SIMUSAM000001937373 0.5105202 simmental simmental
## 138 UMCUSAU000000194657 UMCUSAU000000194654 0.5055063 jersey jersey
## 139 UMCUSAU000000194654 UMCUSAU000000194657 0.5055063 jersey jersey
## 140 UMCUSAU000000194273 HERUSAF000042190680 0.5015844 hereford hereford
## 141 HERUSAF000042190680 UMCUSAU000000194273 0.5015844 hereford hereford
## 142 UMCUSAU000000194573 UMCUSAM000000196784 0.5014160 holstein holstein
## 143 UMCUSAM000000196784 UMCUSAU000000194573 0.5014160 holstein holstein
## 144 UMCUSAU000000194709 UMCUSAU000000194241 0.5003591 angus angus
## 145 UMCUSAU000000194241 UMCUSAU000000194709 0.5003591 angus angus
## 146 HOLUSAM000072495715 HOLUSAM000070626136 0.4974645 holstein holstein
## 147 HOLUSAM000070626136 HOLUSAM000072495715 0.4974645 holstein holstein
## 148 UMCUSAM000000196831 UMCUSAM000000196786 0.4971759 holstein holstein
## 149 UMCUSAM000000196786 UMCUSAM000000196831 0.4971759 holstein holstein
## 150 HOLUSAM000072495715 HOLUSAM000071451889 0.4958166 holstein holstein
## 151 HOLUSAM000071451889 HOLUSAM000072495715 0.4958166 holstein holstein
## 152 UMCUSAU000000194180 HOLFRAM002296001756 0.4939476 holstein holstein
## 153 HOLFRAM002296001756 UMCUSAU000000194180 0.4939476 holstein holstein
## 154 UMCUSAU000000194657 UMCUSAU000000194619 0.4925979 jersey jersey
## 155 UMCUSAU000000194619 UMCUSAU000000194657 0.4925979 jersey jersey
## 156 UMCUSAU000000194725 UMCUSAU000000194257 0.4925138 angus angus
## 157 UMCUSAU000000194257 UMCUSAU000000194725 0.4925138 angus angus
## 158 UMCUSAU000000194729 UMCUSAU000000194262 0.4912487 angus angus
## 159 UMCUSAU000000194262 UMCUSAU000000194729 0.4912487 angus angus
## 160 UMCUSAU000000194688 UMCUSAU000000194220 0.4904374 angus angus
## 161 UMCUSAU000000194220 UMCUSAU000000194688 0.4904374 angus angus
## 162 UMCUSAU000000194657 JER260M003126073776 0.4890576 jersey jersey
## 163 JER260M003126073776 UMCUSAU000000194657 0.4890576 jersey jersey
## 164 UMCUSAU000000194657 JERUSAM000000646877 0.4883554 jersey jersey
## 165 JERUSAM000000646877 UMCUSAU000000194657 0.4883554 jersey jersey
## 166 CHAUSAM000VPM122718 CHACANM0000RMC30160 0.4879357 charolais charolais
## 167 CHACANM0000RMC30160 CHAUSAM000VPM122718 0.4879357 charolais charolais
## 168 UMCUSAM000000196831 HOLUSAM003012560018 0.4871702 holstein holstein
## 169 HOLUSAM003012560018 UMCUSAM000000196831 0.4871702 holstein holstein
## 170 UMCUSAU000000194649 UMCUSAM000000033675 0.4867226 jersey jersey
## 171 UMCUSAM000000033675 UMCUSAU000000194649 0.4867226 jersey jersey
## 172 UMCUSAU000000194699 UMCUSAU000000194231 0.4858802 angus angus
## 173 UMCUSAU000000194231 UMCUSAU000000194699 0.4858802 angus angus
## 174 UMCUSAU000000194657 UMCUSAU000000194646 0.4846924 jersey jersey
## 175 UMCUSAU000000194646 UMCUSAU000000194657 0.4846924 jersey jersey
## 176 UMCUSAU000000194698 UMCUSAU000000194230 0.4844871 angus angus
## 177 UMCUSAU000000194230 UMCUSAU000000194698 0.4844871 angus angus
## 178 UMCUSAM000000196846 HOLUSAM000138122625 0.4803394 holstein holstein
## 179 HOLUSAM000138122625 UMCUSAM000000196846 0.4803394 holstein holstein
## 180 HOL840M003131131453 HOL840M003130854065 0.4793351 holstein holstein
## 181 HOL840M003130854065 HOL840M003131131453 0.4793351 holstein holstein
## 182 UMCUSAM000000196786 HOLUSAM000071451889 0.4783347 holstein holstein
## 183 HOLUSAM000071451889 UMCUSAM000000196786 0.4783347 holstein holstein
## 184 UMCUSAU000000194672 UMCUSAU000000194668 0.4772177 jersey jersey
## 185 UMCUSAU000000194668 UMCUSAU000000194672 0.4772177 jersey jersey
## 186 UMCUSAM000000196838 HOLUSAM003126477819 0.4751568 holstein holstein
## 187 HOLUSAM003126477819 UMCUSAM000000196838 0.4751568 holstein holstein
## 188 UMCUSAU000000194712 UMCUSAU000000194244 0.4746871 angus angus
## 189 UMCUSAU000000194244 UMCUSAU000000194712 0.4746871 angus angus
## 190 UMCUSAM000000196812 HOLUSAM000139383375 0.4707756 holstein holstein
## 191 HOLUSAM000139383375 UMCUSAM000000196812 0.4707756 holstein holstein
## 192 HOLUSAM003131131371 HOLUSAM003125201927 0.4705759 holstein holstein
## 193 HOLUSAM003125201927 HOLUSAM003131131371 0.4705759 holstein holstein
## 194 UMCUSAU000000194643 UMCUSAU000000194637 0.4705502 jersey jersey
## 195 UMCUSAU000000194637 UMCUSAU000000194643 0.4705502 jersey jersey
## 196 HOLCANM000109538236 HOL840M003130854065 0.4705488 holstein holstein
## 197 HOL840M003130854065 HOLCANM000109538236 0.4705488 holstein holstein
## 198 SIMUSAM000002144976 RANUSAM000003358146 0.4698128 simmental simmental
## 199 RANUSAM000003358146 SIMUSAM000002144976 0.4698128 simmental simmental
## 200 UMCUSAU000000194341 UMCUSAU000000194340 0.4691180 jersey jersey
## 201 UMCUSAU000000194340 UMCUSAU000000194341 0.4691180 jersey jersey
Removal
Charolais
keep <-
big_grm_long %>%
filter(ind1_pop == "charolais" & ind2_pop == "charolais") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 1018963 rows (99%)
## distinct: removed 7482 rows (99%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("CHAUSAM00000M225504", "CHAUSAM00000M319454", "CHAUSAM00000M234430", "UMCUSAU000000194368", "CHACANM0000RMC30160", "UMCUSAM000000092750", "CHAFRAM005882101816", "CHAUSAM00000M314744", "UMCUSAU000000194741", "UMCUSAU000000194786", "CHAFRAM005872122876", "CHAUSAM00000M434790", "UMCUSAU000000194843", "CHACANM0000MC236083", "CHACANM0000MC293022")
cha <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "charolais")
## filter: removed 1020907 rows (99%)
## filter: removed 75 rows (1%)
## filter: removed 1110 rows (20%)
## filter: removed 900 rows (20%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3480 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Holstein
keep <-
big_grm_long %>%
filter(ind1_pop == "holstein" & ind2_pop == "holstein") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 965523 rows (94%)
## distinct: removed 60762 rows (>99%)
## left_join: added 3 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("UMCUSAM000000196809", "UMCUSAU000000194573", "HOLUSAM000071451889", "HOL840M003128043644", "HOL840M003131131453", "HOL840M003129128755", "UMCUSAM000000196846", "HOLUSAM000072512148", "UMCUSAM000000196798", "UMCUSAM000000196831", "UMCUSAM000000196813", "UMCUSAM000000196810", "UMCUSAM000000196795")
hol <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "holstein")
## filter: removed 1021203 rows (99%)
## filter: removed 73 rows (1%)
## filter: removed 936 rows (18%)
## filter: removed 780 rows (18%)
## left_join: added 118 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3598 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Angus
keep <-
big_grm_long %>%
filter(ind1_pop == "angus" & ind2_pop == "angus") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 952003 rows (93%)
## distinct: removed 74256 rows (>99%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("AANUSAM000016447771", "AANUSAM000015330743", "AANUSAM000013009379", "AANUSAM000013544928", "AANUSAM000012452829", "AANUSAM000010239760", "AANUSAM000015899735", "AANUSAM000014844711", "AANUSAM000011160685", "AANUSAM000008505294", "AANUSAM000010848986", "UMCUSAF000000118765", "AANUSAM000014056739", "AANUSAM000011105489", "AANUSAM000007187001")
an <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "angus")
## filter: removed 1020907 rows (99%)
## filter: removed 75 rows (1%)
## filter: removed 1110 rows (20%)
## filter: removed 900 rows (20%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3480 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Hereford
keep <-
big_grm_long %>%
filter(ind1_pop == "hereford" & ind2_pop == "hereford") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 1018251 rows (99%)
## distinct: removed 8190 rows (99%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("UMCUSAF000000199724", "UMCUSAF000000199737", "HERCANM000C02698670", "HERUSAM000042361822", "UMCUSAF000000199727", "HERCANM000C02738219", "HERUSAM000041113279", "UMCUSAM000000199725", "UMCUSAF000000199728", "UMCUSAF000000199720", "HERCANM000C02789138", "UMCUSAU000000194350", "HERUSAM000042593689", "UMCUSAF000000199734", "HERCANM000C02020446")
hfd <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "hereford")
## filter: removed 1020907 rows (99%)
## filter: removed 75 rows (1%)
## filter: removed 1110 rows (20%)
## filter: removed 900 rows (20%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3480 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Simmental
keep <-
big_grm_long %>%
filter(ind1_pop == "simmental" & ind2_pop == "simmental") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 1009108 rows (98%)
## distinct: removed 17292 rows (99%)
## left_join: added one row and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("SIMUSAM000001907954", "RANUSAM000003358146", "SIMUSAM000002068996", "SIMUSAM000002240471", "SIMCANM000000287068", "SIMUSAM000002144976", "UMCUSAU000000198090", "SIMCANM000000172281", "SIMUSAM000002081939", "SIMUSAM000002002092", "UMCUSAU000000194365", "SIMUSAM000001282876", "SIMUSAM000001716917", "UMCUSAU000000198091")
sim <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "simmental")
## filter: removed 1021056 rows (99%)
## filter: removed 74 rows (1%)
## filter: removed 1022 rows (19%)
## filter: removed 840 rows (19%)
## left_join: added 59 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3539 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Jersey
keep <-
big_grm_long %>%
filter(ind1_pop == "jersey" & ind2_pop == "jersey") %>%
distinct(ind1) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
top_n(75, avg_coverage) %>%
arrange(desc(avg_coverage))
## filter: removed 1016928 rows (99%)
## distinct: removed 9506 rows (99%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("JERUSAM000000652501", "JERUSAM000000646877", "UMCUSAU000000194623", "JERAUSM000A00003096", "JERUSAM000000649797", "JERUSAM000000665185", "JERUSAM000000651268", "UMCUSAU000000194671", "UMCUSAU000000194657", "UMCUSAU000000194672", "UMCUSAU000000194637", "UMCUSAU000000194341", "UMCUSAU000000194335", "UMCUSAU000000194662", "UMCUSAU000000194652")
jer <- big_grm_long %>%
filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>%
filter(ind1 != ind2) %>%
#filter(value > 0.12) %>%
filter(!ind1 %in% out) %>%
filter(!ind2 %in% out) %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
select(ind1:value, avg_coverage, everything()) %>%
arrange(desc(value), desc(avg_coverage)) %>%
distinct(ind1) %>%
mutate(pop = "jersey")
## filter: removed 1020907 rows (99%)
## filter: removed 75 rows (1%)
## filter: removed 1110 rows (20%)
## filter: removed 900 rows (20%)
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## distinct: removed 3480 rows (98%)
## mutate: new variable 'pop' with one unique value and 0% NA
Re-evaluate post-pruning
grm_prune <-
bind_rows(an, cha, hol, hfd, jer, sim)
big_grm_prune <-
big_grm_long %>%
filter(ind1 %in% grm_prune$ind1) %>%
filter(ind2 %in% grm_prune$ind1)
## filter: removed 652492 rows (64%)
## filter: removed 244440 rows (65%)
big_grm_prune %>%
ggplot(aes(x = ind1,
y = ind2,
fill = value)) +
geom_tile() +
viridis::scale_fill_viridis(
option = "inferno",
direction = -1) +
theme(
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()
) +
labs(
x = "Individual 1",
y = "Individual 2",
title = str_c("Pairwise genomic relatedness post-pruning: multi-breed (all individuals)")
)

ggsave(here::here(str_c("figures/sample_selection/big_grm_pruned.png")))
## Saving 7 x 5 in image
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais"), plot_big_grm_prune)
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%)
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
Plot distribution of post-pruning GRM values
big_grm_prune %>%
filter(ind1_pop == ind2_pop) %>%
filter(ind1 != ind2) %>%
mutate(ind1_pop = str_to_title(ind1_pop)) %>%
ggplot(aes(x = value,
y = forcats::fct_reorder(ind1_pop, value, mean),
#fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
fill = ind1_pop)) +
ggridges::geom_density_ridges(alpha = 0.5) +
guides(fill = FALSE) +
geom_vline(aes(xintercept = mean(value))) +
labs(x = "GRM value", y = "Kernel density", title = str_wrap("Density of pairwise genomic relatedness by breed post-pruning", width = 50))
## filter: removed 108000 rows (83%)
## filter: removed 360 rows (2%)
## mutate: changed 21240 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.00924

cowplot::ggsave(here::here("figures/sample_selection/prune_grm_breed_density.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.00924
- Actually increased mean relatedness? How did that happen?
big_grm_long %>%
filter(ind1_pop == ind2_pop) %>%
filter(!ind1_pop %in% c("brahman", "limousin")) %>%
filter(!ind2_pop %in% c("brahman", "limousin")) %>%
bind_rows(big_grm_prune %>%
filter(ind1_pop == ind2_pop) %>%
mutate(prune = "Post")) %>%
filter(ind1 != ind2) %>%
group_by(prune) %>%
summarise(mean_rel = mean(value),
min_rel = min(value),
max_rel = max(value))
## filter: removed 844516 rows (82%)
## filter: removed 3600 rows (2%)
## filter: no rows removed
## filter: removed 108000 rows (83%)
## mutate: new variable 'prune' with one unique value and 0% NA
## filter: removed 1288 rows (1%)
## group_by: 0 groups []
## # A tibble: 2 x 4
## prune mean_rel min_rel max_rel
## <chr> <dbl> <dbl> <dbl>
## 1 <NA> 0.0745 -0.100 1.27
## 2 Post 0.0896 -0.100 0.400
big_grm_long %>%
filter(ind1_pop == ind2_pop) %>%
filter(!ind1_pop %in% c("brahman", "limousin")) %>%
filter(!ind2_pop %in% c("brahman", "limousin")) %>%
mutate(prune = "Pre") %>%
bind_rows(big_grm_prune %>%
filter(ind1_pop == ind2_pop) %>%
mutate(prune = "Post")) %>%
filter(ind1 != ind2) %>%
mutate(ind1_pop = str_to_title(ind1_pop)) %>%
ggplot(aes(x = value, fill = prune)) +
geom_density(alpha = 0.5) +
labs(x = "GRM value", y = "Kernel density", fill = "", title = "Density of pairwise genomic relatedness post-pruning")
## filter: removed 844516 rows (82%)
## filter: removed 3600 rows (2%)
## filter: no rows removed
## mutate: new variable 'prune' with one unique value and 0% NA
## filter: removed 108000 rows (83%)
## mutate: new variable 'prune' with one unique value and 0% NA
## filter: removed 1288 rows (1%)
## mutate: changed 198728 values (100%) of 'ind1_pop' (0 new NA)

cowplot::ggsave(here::here("figures/sample_selection/prune_grm_all_density.png"), width = 10, height = 7, dpi = 500)
big_grm_long %>%
mutate(prune = "Pre") %>%
bind_rows(big_grm_prune %>%
mutate(prune = "Post")) %>%
filter(ind1 != ind2 & ind1_pop == ind2_pop) %>%
filter(!ind1_pop %in% c("brahman", "limousin")) %>%
filter(!ind2_pop %in% c("brahman", "limousin")) %>%
group_by(prune, ind1) %>%
summarise(
n_hi = sum(value > 0.12),
mean_rel = mean(value),
max_rel = max(value),
min_rel = min(value)
)
%>%
left_join(breed_key, by = c("ind1" = "id")) %>%
ggplot(aes(x = n_hi,
fill = prune)) +
geom_histogram(bins = 8, alpha = 0.5) +
facet_wrap(~ pop)
big_grm_prune %>%
filter(!ind1_pop %in% c("brahman", "limousin")) %>%
filter(!ind2_pop %in% c("brahman", "limousin")) %>%
filter(ind1 != ind2) %>%
group_by(ind1) %>%
summarise(
n_hi = sum(value > 0.12),
mean_rel = mean(value),
max_rel = max(value),
min_rel = min(value)
) %>%
arrange(desc(n_hi)) %>%
left_join(breed_key, by = c("ind1" = "id")) %>%
left_join(cov_avg, by = c("ind1" = "sid")) %>%
mutate(pop = str_to_title(pop)) %>%
ggplot(aes(x = n_hi,
#fill = forcats::fct_reorder(pop, n_hi, mean, .desc = TRUE))) +
fill = pop)) +
guides(fill = FALSE) +
geom_histogram(bins = 8) +
facet_wrap(~ forcats::fct_reorder(pop, n_hi, mean, .desc = TRUE)) +
labs(title = "Number of 'influential individuals' by breed", y = "Number of individuals", x = str_wrap("Number of pairwise comparisons where value > 0.12, by individual", width = 50))
cowplot::ggsave(here::here("figures/sample_selection/big_grm_n_influential.png"), width = 10, height = 7, dpi = 500)
Defunct: By breed-specific GRM
grm_long <-
melt_grm("angus") %>%
bind_rows(melt_grm("limousin")) %>%
bind_rows(melt_grm("holstein")) %>%
bind_rows(melt_grm("hereford")) %>%
bind_rows(melt_grm("simmental")) %>%
bind_rows(melt_grm("jersey")) %>%
bind_rows(melt_grm("charolais")) %>%
bind_rows(melt_grm("brahman"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## X274 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X274)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Parsed with column specification:
## cols(
## .default = col_double(),
## X52 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X52)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X248 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X248)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X92 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X92)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X133 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X133)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X99 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X99)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X88 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X88)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
## .default = col_double(),
## X61 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X61)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6)
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
Plot breed specific GRM
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_grm)
## filter: removed 123608 rows (67%)
## Saving 7 x 5 in image
## filter: removed 110088 rows (60%)
## Saving 7 x 5 in image
## filter: removed 167193 rows (91%)
## Saving 7 x 5 in image
## filter: removed 175013 rows (95%)
## Saving 7 x 5 in image
## filter: removed 176336 rows (96%)
## Saving 7 x 5 in image
## filter: removed 177048 rows (96%)
## Saving 7 x 5 in image
## filter: removed 181017 rows (98%)
## Saving 7 x 5 in image
## filter: removed 182016 rows (99%)
## Saving 7 x 5 in image
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
- Likely duplicate Charolais samples
- Should probably treat lowline Angus as a separate population
grm_long %>%
left_join(approved, by = c("ind1" = "international_id")) %>%
filter(ind1 != ind2) %>%
top_n(200, value) %>%
arrange(desc(value)) %>%
writexl::write_xlsx(here::here("data/derived_data/sample_selection/bs_top200_nodiag.xlsx"))
## left_join: added 873 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 1043 rows (1%)
Defunct: Pairwise relatedness via kinship coefficient
kin <-
list.files(here::here("data/derived_data/sample_selection/ds_relatedness2/"),
full.names = TRUE) %>%
#Name the elements of the list based on a stripped down version of the filepath
purrr::set_names(nm = (basename(.) %>%
tools::file_path_sans_ext())) %>%
purrr::map_df(read_table2, .id = "source") %>%
mutate(source = str_remove(source, "\\.ds_relatedness2")) %>%
rename(ind1 = INDV1, ind2 = INDV2, phi = RELATEDNESS_PHI, source_pop = source)
## mutate: changed 184617 values (100%) of 'source' (0 new NA)
Plot kinship coefficients
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_kin)
## filter: removed 123608 rows (67%)
## filter: removed 110088 rows (60%)
## filter: removed 167193 rows (91%)
## filter: removed 175013 rows (95%)
## filter: removed 176336 rows (96%)
## filter: removed 177048 rows (96%)
## filter: removed 181017 rows (98%)
## filter: removed 182016 rows (99%)
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
Plotting highly “connected” individuals vs. coverage
Confirmed that I’m not going to screw anything coverage-wise by first removing highly connected individuals
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_kin_cov)
## filter: removed 123855 rows (67%)
## distinct: removed 30381 rows (50%)
## group_by: 0 groups []
## left_join: added 3 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 110361 rows (60%)
## distinct: removed 37128 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 167325 rows (91%)
## distinct: removed 8646 rows (50%)
## group_by: 0 groups []
## left_join: added one row and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 175111 rows (95%)
## distinct: removed 4753 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## Warning: Removed 1 rows containing missing values (geom_point).
## filter: removed 176427 rows (96%)
## distinct: removed 4095 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 177135 rows (96%)
## distinct: removed 3741 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 181077 rows (98%)
## distinct: removed 1770 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 182067 rows (99%)
## distinct: removed 1275 rows (50%)
## group_by: 0 groups []
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
Plot distribution of kinship values
kin %>%
mutate(source_pop = str_to_title(source_pop)) %>%
ggplot(aes(x = phi, y = forcats::fct_reorder(source_pop, phi, mean), fill = forcats::fct_reorder(source_pop, phi, mean, .desc = TRUE))) +
ggridges::geom_density_ridges(alpha = 0.5) +
geom_vline(aes(xintercept = mean(phi))) +
labs(x = "Kinship coefficient", y = "Kernel density", fill = "Breed", title = "Density of pairwise kinship coefficient by breed")
## mutate: changed 184617 values (100%) of 'source_pop' (0 new NA)
## Picking joint bandwidth of 0.0174

cowplot::ggsave(here::here("figures/sample_selection/kinship_density.png"), width = 7, height = 10, dpi = 500)
## Picking joint bandwidth of 0.0174